/*This script creates the main analysis datasets for the proof-of-concept study in Section 4
of the Cambridge Elements.*/

//We used Stata/SE 18.0 for Mac (Intel 64-bit), Revision 25 Apr 2023



///////////////////////////////////
//Prepare constituency-level data
///////////////////////////////////

//Import unemployment data from the House of Commons Library
import excel "./House of Commons Library data/Unemployment.xlsx", sheet("Data") firstrow clear

//Keep only date closest to survey data and only unemployment rate and constituency name
keep if DateOfDataset == date("01/11/2019", "DMY")
keep UnempConstRate ConstituencyName

//Rename, relabel and streamline constituency names for later merging
rename ConstituencyName constituency
rename UnempConstRate unemployment
replace unemployment = unemployment * 100
label variable constituency "Name of respondent's constituency"
label variable unemployment "Constituency unemployment rate (HoC)"
replace constituency = "Carmarthen West and South Pembrokeshire" if constituency == "Carmarthen West and Pembrokeshire South"
replace constituency = "Na h-Eileanan an Iar" if constituency == "Na h-Eileanan An Iar"

//Save data
save "./Created auxiliary datasets/constituency_level_data.dta", replace

//Import constituency election results in 2017 from House of Commons Library (https://researchbriefings.parliament.uk/ResearchBriefing/Summary/CBP-7979)
import delimited "./House of Commons Library data/HoC-GE2017-constituency-results.csv", clear 

//Merge 2019 results
merge 1:1 ons_id using "./House of Commons Library data/2019_results.dta"
drop _merge

//Rename and relabel
rename constituency_name constituency
label variable constituency "Name of respondent's constituency"

//Correct for by-elections (Wikipedia: https://en.wikipedia.org/wiki/List_of_United_Kingdom_by-elections_(2010–present))
replace majority = 1425 if constituency == "Brecon and Radnorshire"
replace valid_votes = 31814 if constituency == "Brecon and Radnorshire"
replace majority = 683 if constituency == "Peterborough"
replace valid_votes = 33920 if constituency == "Peterborough"
replace majority = 1951 if constituency == "Newport West"
replace valid_votes = 23515 if constituency == "Newport West"
replace majority = 5629 if constituency == "Lewisham East"
replace valid_votes = 22056 if constituency == "Lewisham East"
replace majority = 7956 if constituency == "West Tyrone"
replace valid_votes = 35337 if constituency == "West Tyrone"

//Create indicator of marginality
gen majority_size = majority/valid_votes * 100
label variable majority_size "Chosen MP's electoral majority (% of votes)"

//Create indicator of whether party won seat again
gen won_again = 0
replace won_again = 1 if first_party == first_party_2019
label variable won_again "Binary indicator whether party won constituency again"

//Only keep constituency name and majority size 
keep constituency majority_size won_again

//Merge with constituency-level data and save
merge 1:1 constituency using "./Created auxiliary datasets/constituency_level_data.dta"
drop _merge 
save "./Created auxiliary datasets/constituency_level_data.dta", replace

//Load Brexit vote data
import excel "./Other data sources/eureferendum_constituency.xlsx", sheet("DATA") firstrow clear

//Merge with constituency-level data
rename Constituency constituency
drop if ONSID == ""
merge m:m constituency using "./Created auxiliary datasets/constituency_level_data.dta"
drop _merge

//Keep only relevant variables
rename FIGURETOUSE brexit_vote
keep constituency majority_size unemployment brexit_vote won_again
label variable brexit_vote "Pro-Brexit vote share in the referendum by constituency (Hanretty 2017)"

//Save data
save "./Created auxiliary datasets/constituency_level_data.dta", replace



/////////////////////////
//Prepare MP-level data
/////////////////////////

//Import gender data from House of Commons Library (https://researchbriefings.parliament.uk/ResearchBriefing/Summary/CBP-7979)
import delimited "./House of Commons Library data/HoC-GE2017-results-by-candidate.csv", clear 

//Create name of representative
gen representative = firstname + " " + surname

//Rename and relabel variable and get rid of variables not needed
rename gender gender_MP
rename constituency_name constituency
label variable gender_MP "Gender of MP"
label variable representative "Name of MP"
label variable constituency "Name of respondent's constituency"
keep representative gender_MP constituency

//Streamline names of constituencies or representatives where necessary (e.g. nicknames, marriage, different hyphenation/capital letter rules)
replace constituency = "Ashton-under-Lyne" if constituency == "Ashton-Under-Lyne"
replace representative = "Marsha de Cordova" if representative == "Marsha De Cordova"
replace representative = "Mohammad Yasin" if representative == "Mohammad  Yasin"
replace constituency = "Berwick-upon-Tweed" if constituency == "Berwick-Upon-Tweed"
replace representative = "Preet Kaur Gill" if representative == "Preet Gill"
replace representative = "Naseem Shah" if representative == "Naz Shah"
replace representative = "Bob Neill" if representative == "Robert Neill"
replace representative = "Daniel Poulter" if representative == "Dan Poulter"
replace constituency = "Cities of London and Westminster" if constituency == "Cities Of London and Westminster"
replace constituency = "City of Chester" if constituency == "City Of Chester"
replace constituency = "City of Durham" if constituency == "City Of Durham"
replace representative = "Ed Miliband" if representative == "Edward Miliband"
replace representative = "Liz Saville-Roberts" if representative == "Liz Saville Roberts"
replace representative = "Steve Pound" if representative == "Stephen Pound"
replace representative = "John McNally" if representative == "John Mc Nally"
replace constituency = "Forest of Dean" if constituency == "Forest Of Dean"
replace representative = "Nicholas Boles" if representative == "Nick Boles"
replace representative = "Andrew Slaughter" if representative == "Andy Slaughter"
replace representative = "Neil O'Brien" if representative == "Neil  O'Brien"
replace representative = "John Martin McDonnell" if representative == "John McDonnell"
replace representative = "Ruth George" if representative == "Ruth  George"
replace constituency = "Isle of Wight" if constituency == "Isle Of Wight"
replace representative = "Diana R. Johnson" if representative == "Diana Johnson"
replace representative = "Jon Ashworth" if representative == "Jonathan Ashworth"
replace representative = "Jo Platt" if representative == "Joanne Platt"
replace representative = "Karen Lee" if representative == "Karen  Lee"
replace constituency = "Na h-Eileanan an Iar" if constituency == "Na h-Eileanan An Iar"
replace constituency = "Newcastle upon Tyne Central" if constituency == "Newcastle Upon Tyne Central"
replace constituency = "Newcastle upon Tyne East" if constituency == "Newcastle Upon Tyne East"
replace constituency = "Newcastle upon Tyne North" if constituency == "Newcastle Upon Tyne North"
replace constituency = "Newcastle-under-Lyme" if constituency == "Newcastle-Under-Lyme"
replace representative = "Laura Pidcock" if representative == "Laura  Pidcock"
replace representative = "Luke Graham" if representative == "Luke  Graham"
replace representative = "Jo Johnson" if representative == "Joseph Johnson"
replace representative = "Ged Killen" if representative == "Gerard Killen"
replace representative = "Rebecca Long-Bailey" if representative == "Rebecca Long Bailey"
replace representative = "Nicholas Dakin" if representative == "Nic Dakin"
replace representative = "Tanmanjeet Singh Dhesi" if representative == "Tan Dhesi"
replace representative = "Christopher Hazzard" if representative == "Chris Hazzard"
replace constituency = "Stoke-on-Trent Central" if constituency == "Stoke-On-Trent Central"
replace constituency = "Stoke-on-Trent North" if constituency == "Stoke-On-Trent North"
replace constituency = "Stoke-on-Trent South" if constituency == "Stoke-On-Trent South"
replace constituency = "Stratford-on-Avon" if constituency == "Stratford-On-Avon"
replace representative = "Thomas Tugendhat" if representative == "Tom Tugendhat"
replace representative = "Vincent Cable" if representative == "Vince Cable"
replace constituency = "Vale of Clwyd" if constituency == "Vale Of Clwyd"
replace constituency = "Vale of Glamorgan" if constituency == "Vale Of Glamorgan"
replace representative = "Ed Vaizey" if representative == "Edward Vaizey"
replace representative = "Faisal Rashid" if representative == "Faisal  Rashid"
replace representative = "Martin Docherty" if representative == "Martin Docherty-Hughes"
replace representative = "Matthew Hancock" if representative == "Matt Hancock"
replace representative = "Steven Baker" if representative == "Steve Baker"
replace representative = "Julia Lopez" if representative == "Julia Dockerill"
replace representative = "Nick Brown" if representative == "Nicholas Brown"
replace representative = "Chris Matheson" if representative == "Christian Matheson"
replace representative = "Suella Braverman" if representative == "Suella Fernandes"
replace representative = "Ian Paisley Jnr" if representative == "Ian Paisley"

//Save data
save "./Created auxiliary datasets/mp_level_data.dta", replace


//Import standing down data from Wikipedia (https://en.wikipedia.org/wiki/List_of_MPs_who_stood_down_at_the_2019_United_Kingdom_general_election)
import excel "./Other data sources/standing_down.xlsx", sheet("Standing_Down") firstrow clear

//Rename and relabel variables
rename Constituency constituency
rename Representative representative
label variable constituency "Name of respondent's constituency"
label variable representative "Name of MP"

//Streamline constituency names where necessary
replace constituency = "Ynys Mon" if constituency == "Ynys Môn"
replace constituency = "Liverpool, Riverside" if constituency == "Liverpool Riverside"
replace constituency = "Liverpool, West Derby" if constituency == "Liverpool West Derby"
replace constituency = "Sheffield, Hallam" if constituency == "Sheffield Hallam"

//Create indicator for MP standing down in 2019
gen standing_down = 1
label variable standing_down "Binary indicator whether MP is standing down in 2019"

//Merge with MP-level data and save
merge 1:m constituency using "./Created auxiliary datasets/mp_level_data.dta", update replace
drop _merge
save "./Created auxiliary datasets/mp_level_data.dta", replace


//Load HoC divisions data from www.publicwhip.org.uk, provided by Ben Lauderdale
import delimited "./Other data sources/votematrix-2017.txt", clear 

//Reshape divisions data to long format
gen vote_id = _n
reshape long mpid, i(vote_id) j(MP)
rename mpid vote

//Save divisions data
save "./Other data sources/divisions_HoC.dta", replace

//Load ids of MPs from www.publicwhip.org.uk, provided by Ben Lauderdale
import delimited "./Other data sources/mps-2017.txt", clear 

//Rename ids
rename mpid MP

//Merge with divisions data
merge 1:m MP using "./Other data sources/divisions_HoC.dta"

//Recode vote variable
drop if vote == -9 //missing, no participation
drop if vote == 3 //both
recode vote (1 2 = 1 "Yes") (4 5 = 0 "No"), gen(vote_rec) //joins votes for tellers and other MPs

//Calculate majority vote by party
sort vote_id party 
egen yes_percent = mean(vote_rec), by(vote_id party)

//Assign whether vote by MP was rebellious (against majority of party)
gen rebellion = 0
replace rebellion = 1 if vote_rec == 1 & yes_percent < 0.5
replace rebellion = 1 if vote_rec == 0 & yes_percent > 0.5

//Collapse data to get rates of rebellion, recording the last vote of an MP under a specific party whip
gen representative = firstname + " " + surname
gen last_vote = date(date, "YMD")
format last_vote %td
collapse (mean) rebellion (max) last_vote, by(representative party)

//Only retain last party membership
sort representative
egen last_obs = max(last_vote), by(representative)
format last_obs %td
keep if last_obs == last_vote

//Correct Amber Rudd to be independent (left party on 7th September 2019)
replace party = "Independent" if representative == "Amber Rudd"

//Set rebellion to 1 for independents
replace rebellion = 1 if party == "Independent"

//Render rebellion to percent
replace rebellion = rebellion * 100

//Streamline MPs' names where necessary
replace representative = "Brendan O'Hara" if representative == "Brendan O&#39;Hara"
replace representative = "Jared O'Mara" if representative == "Jared O&#39;Mara"
replace representative = "Neil O'Brien" if representative == "Neil O&#39;Brien"

/*Missing due to Sinn Fein
Christopher Hazzard
Francie Molloy
Michelle Gildernew
Paul Maskey
Órfhlaith Begley
*/

/*Missing due to Speakers
Eleanor Laing 
John Bercow
Lindsay Hoyle
Rosie Winterton
*/

//Rename, relabel and drop variables
rename party last_party_whip
label variable last_party_whip "Last party MP belonged to"
label variable representative "Name of MP"
label variable rebellion "Chosen MP's rate of rebellion in HoC divisions (%)"
label variable last_vote "Date of MP's last vote in HoC"
drop last_obs

//Merge and get rid of candidates who did not become MPs
merge 1:m representative using "./Created auxiliary datasets/mp_level_data.dta"
drop if _merge == 2 &  !(representative == "Christopher Hazzard" | representative == "Francie Molloy" | representative == "Michelle Gildernew" | representative == "Paul Maskey" | representative == "Órfhlaith Begley" | representative == "Eleanor Laing" | representative == "John Bercow" | representative == "Lindsay Hoyle" | representative == "Rosie Winterton") //keep speakers and Sinn Fein MPs
drop _merge

//Add gender of two MPs that won by-elections
replace gender_MP = "Female" if representative == "Janet Daby"
replace gender_MP = "Female" if representative == "Lisa Forbes"

//Change constituency of MPs that ran in different constituencies in 2017 than they won by-elections in later
replace constituency = "Brecon and Radnorshire" if representative == "Jane Dodds"
replace constituency = "Peterborough" if representative == "Lisa Forbes"
replace constituency = "Newport West" if representative == "Ruth Jones"
replace constituency = "Lewisham East" if representative == "Janet Daby"
//Change "standing down" for Jane Dodds as this was carried over from MP in the constituency she ran as candidate in 2017
replace standing_down = . if representative == "Jane Dodds"

//Amend standing down indicator
replace standing_down = 0 if standing_down == .
label define standing_down 0 "Running again" 1 "Standing down"
label values standing_down standing_down

//Save data
save "./Created auxiliary datasets/mp_level_data.dta", replace


//Load text model estimates
import delimited "./Created auxiliary datasets/text_models_MPs.csv", numericcols(5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17) clear 
drop v1

//Rename variables and MPs (where necessary)
label variable local_focus "MP's rhetorical focus on their local constituency"
label variable local_focus "MP's rhetorical focus on their local constituency"
label variable mentions_constituency "# MP's mentions of their constituency name"
label variable mentions_generic_constituency "# MP's generic mentions of their constituency"
label variable mentions_constituents "# MP's mentions of their constituents"
label variable totalnowords "# MP's words in Parliament speeches"
label variable ws_left_right "MP's left-right position (Wordscores)"
label variable ws_left_right_se "Standard error of MP's left-right position (Wordscores)"
label variable women_words "# of women words used by MP"
label variable women_words_perc "Percentage of women words used by MP"
label variable scores_mean_rep "MP's average rhetorical engagement with republican justification"
label variable scores_mean_plu "MP's average rhetorical engagement with pluralist justification"
label variable scores_rep_plu_scale "MP's average rhetorical engagement with republican justification minus their average engagement with pluralist justification"

rename mp representative
replace representative = "Cheryl Gillan" if representative == "Dame Cheryl Gillan"
replace representative = "Caroline Johnson" if representative == "Dr Caroline Johnson"
replace representative = "Julia Lopez" if representative == "Julia Dockerill"
replace representative = "Margaret Beckett" if representative == "Miss Margaret Jackson"
replace representative = "David Amess" if representative == "Sir David Amess"
replace representative = "Suella Braverman" if representative == "Suella Fernandes"

/*Missing due to Sinn Fein
Christopher Hazzard
Francie Molloy
Michelle Gildernew
Paul Maskey
Órfhlaith Begley
*/

/*MPs who won by-election but never spoke
Jane Dodds
Lisa Forbes
Ruth Jones
*/

//Mike Freer and Rebecca Harris never spoke in the Commons

//Merge with other MP-level data
merge 1:m representative using "./Created auxiliary datasets/mp_level_data.dta", update replace
drop if _merge == 2 &  !(representative == "Christopher Hazzard" | representative == "Francie Molloy" | representative == "Michelle Gildernew" | representative == "Paul Maskey" | representative == "Órfhlaith Begley" | representative == "Jane Dodds" | representative == "Lisa Forbes" | representative == "Ruth Jones" | representative == "Mike Freer" | representative == "Rebecca Harris") //keep those that did not spoke
drop _merge

//Check and drop duplicate observations (candidates who did not make it with same name as MP)
duplicates list representative rebellion women_words
drop if representative == "Angela Smith" & constituency == "Wealden"
drop if representative == "Chris Evans" & constituency == "Swansea East"
drop if representative == "Darren Jones" & constituency == "Islwyn"
drop if representative == "David Jones" & constituency == "Blackley and Broughton"
drop if representative == "Gareth Thomas" & constituency == "Clwyd West"
drop if representative == "John Hayes" & constituency == "Hitchin and Harpenden"
drop if representative == "Liam Byrne" & constituency == "Bury St Edmunds"
drop if representative == "Ronnie Campbell" & constituency == "Ross, Skye and Lochaber"
drop if representative == "Steven Baker" & constituency == "Ellesmere Port and Neston"

/*
651 MPs are left. Four constituencies had by-elections, should be 654 MPs.
But Sinn Fein MPs Elisha McCallion (Foyle), Mickey Brady (Newry and Armagh) 
and Órfhlaith Begley (West Tyrone) are entirely missing.*/

//Save data
save "./Created auxiliary datasets/mp_level_data.dta", replace


//Load IRT estimates
import delimited "./Created auxiliary datasets/irt_models_MPs.csv", clear 
drop v1

//Rename variables as necessary
rename mp representative

//Merge with other MP-level data
merge 1:m representative using "./Created auxiliary datasets/mp_level_data.dta", update replace
drop _merge

//21 MPs missing who did not vote on Brexit or are speaker/Sinn Fein

//Relabel variables
label variable lower_period1_sq "Lower bound of 95% credible interval for MP's ideal point on Brexit (period 1)"
label variable lower_period2_sq "Lower bound of 95% credible interval for MP's ideal point on Brexit (period 1)"
label variable lower_change_sq "Lower bound of 95% credible interval for MP's ideal point on Brexit (period 2 - period 1)"
label variable upper_period1_sq "Upper bound of 95% credible interval for MP's ideal point on Brexit (period 2)"
label variable upper_period2_sq "Upper bound of 95% credible interval for MP's ideal point on Brexit (period 2)"
label variable upper_change_sq "Upper bound of 95% credible interval for MP's ideal point on Brexit (period 2 - period 1)"
label variable means_period1_sq "Ideal point on Brexit (period 1)"
label variable means_period2_sq "Ideal point on Brexit (period 2)"
label variable change_sq "Change in ideal point on Brexit (period 2 - period 1)"

//Save data
save "./Created auxiliary datasets/mp_level_data.dta", replace



//////////////////////////////////
//Merge MP and constituency data
//////////////////////////////////

//Merge
merge m:1 constituency using "./Created auxiliary datasets/constituency_level_data.dta"
drop _merge

/*Only the three constituencies of Sinn Fein MPs for which all data
is missing (Foyle, West Tyrone and Newry and Armagh) are missing */

//Correct marginality data for constituencies with by-election
replace majority_size = . if constituency == "Brecon and Radnorshire" & representative == "Chris Davies"
replace majority_size = . if constituency == "Peterborough" & representative == "Fiona Onasanya"
replace majority_size = . if constituency == "Newport West" & representative == "Paul Flynn"
replace majority_size = . if constituency == "Lewisham East" & representative == "Heidi Alexander"

//Create responsiveness measure
encode last_party_whip, gen(party_whip_enc)

reg change_sq c.means_period1_sq c.brexit_vote if majority_size < 10
predict responsiveness if majority_size < 10, residual
replace responsiveness = abs(responsiveness)
label variable responsiveness "Measure of MP's responsiveness on Brexit"

//Plot model effect
margins, at(brexit_vote = (0.3 (0.1) 0.7))
marginsplot, scheme(lean2) recast(line) recastci(rline) ylabel(,glcolor(none) glwidth(vvthin)) ciopts(lpattern(longdash)) title("") xtitle("{bf:Brexit vote in constituency}") ytitle("{bf:Change towards/from Brexit}" "{bf:after election was announced}", size(medium)) xlab(0.35 "35%" 0.5 "50%" 0.65 "65%") xscale(range(0.3 0.7))
graph export "./Tables and Figures/brexit_responsiveness.png", height(1500) replace

//Save
save "./Created auxiliary datasets/mp_constituency_data.dta", replace



////////////////////////
//Prepare survey data
////////////////////////

//Import survey data
//import excel "./Prolific survey data/Prolific_Survey_Data.xlsx", sheet("Data") firstrow clear
import excel "./Prolific survey data/Multidimensional Representation Main Data.xlsx", sheet("Sheet0") firstrow clear

//Drop respondents without consent
drop if Consent == "I do not want to participate in this study."

//Label unique respondent identifier
label variable ResponseId "Unique identifier for each respondent"

//Calculate respondents' approximate age
gen age = 2019 - Birth 
label variable age "Respondent's approximate age in years"

//Create dummy variable for sex
encode Gender, gen(gender)
gen female = 0
replace female = 1 if gender == 1
replace female = . if gender == 3 // "Other"
replace female = . if gender == 4 // "Prefer not to say"
//Reassign sex for "Other" and "Prefer not to say" from Prolific metadata (simplification to retain entire sample)
replace female = 0 if ResponseId == "R_2SvLYA6hiznKQRu"
replace female = 0 if ResponseId == "R_33lu1qQez0A1iBl"
replace female = 1 if ResponseId == "R_332qwg2GnpFQU9W"
replace female = 0 if ResponseId == "R_1OHzgvQh2XBamLz"
replace female = 0 if ResponseId == "R_29bqqcKOSkEFsH0"
replace female = 0 if ResponseId == "R_2B41HVNfBguw6iT"
replace female = 0 if ResponseId == "R_27375MUGxDBfE1k"
//Label sex dummy
label define female 0 "Male" 1 "Female"
label values female female
label variable female "Dummy variable for sex of respondent"

//Create dummy variable for full university education
encode Education, gen(education)
recode education (18 19 = 1 "University degree") (nonmissing = 0 "No university degree"), gen(uni_degree)
label variable uni_degree "Dummy variable for respondents holding university degrees"

//Create left-right self-placement
replace Left_Right = "0" if Left_Right == "0 - Left"
replace Left_Right = "10" if Left_Right == "10 - Right"
destring Left_Right, gen(left_right)
label variable left_right "Left-right self-placement of respondent (0-10 scale)"

//Encode partisanship
encode Party_Close, gen(party_close)
label variable party_close "Party respondent feels close to"

//Create justification scale
replace Justification_Scale = "1" if Justification_Scale == "1 - People like me"
replace Justification_Scale = "7" if Justification_Scale == "7 - Society as a whole"
encode Justification_Scale, gen(justification_dimension)
label variable justification_dimension "Respondent's demand for republican justification"

//Create personalization scale
replace Party_Independent = "1" if Party_Independent == "1 - Very little"
replace Party_Independent = "7" if Party_Independent == "7 - A lot"
encode Party_Independent, gen(personalization_dimension)
label variable personalization_dimension "Respondent's demand for party independence"

//Create responsiveness scale
gen responsiveness_dimension = .
replace responsiveness_dimension = 1 if Tradeoff_Votes == "Always follow own convictions"
replace responsiveness_dimension = 2 if Tradeoff_Votes == "Primarily follow own convictions"
replace responsiveness_dimension = 3 if Tradeoff_Votes == "Rather follow own convictions"
replace responsiveness_dimension = 4 if Tradeoff_Votes == "Rather follow voters"
replace responsiveness_dimension = 5 if Tradeoff_Votes == "Primarily follow voters"
replace responsiveness_dimension = 6 if Tradeoff_Votes == "Always follow voters"
label variable responsiveness_dimension "Respondent's demand for high sanction-sensitivity"

//Create surrogation scale
replace MPs_1 = "1" if MPs_1 == "1 - Not at all"
replace MPs_2 = "1" if MPs_2 == "1 - Not at all"
replace MPs_3 = "1" if MPs_3 == "1 - Not at all"
replace MPs_4 = "1" if MPs_4 == "1 - Not at all"
replace MPs_5 = "1" if MPs_5 == "1 - Not at all"
replace MPs_6 = "1" if MPs_6 == "1 - Not at all"

replace MPs_1 = "7" if MPs_1 == "7 - Very much"
replace MPs_2 = "7" if MPs_2 == "7 - Very much"
replace MPs_3 = "7" if MPs_3 == "7 - Very much"
replace MPs_4 = "7" if MPs_4 == "7 - Very much"
replace MPs_5 = "7" if MPs_5 == "7 - Very much"
replace MPs_6 = "7" if MPs_6 == "7 - Very much"

encode MPs_1, gen(s_MP1)
encode MPs_2, gen(s_MP2)
encode MPs_3, gen(s_MP3)
encode MPs_4, gen(s_MP4)
encode MPs_5, gen(s_MP5)
encode MPs_6, gen(s_MP6)

//Make assumption that missing values indicate no identification at all (re-assign scale value "1")
replace s_MP1 = 1 if s_MP1 == .
replace s_MP2 = 1 if s_MP2 == .
replace s_MP3 = 1 if s_MP3 == .
replace s_MP4 = 1 if s_MP4 == .
replace s_MP5 = 1 if s_MP5 == .
replace s_MP6 = 1 if s_MP6 == .

//Calculate territorial surrogation as maximum identification with non-constituency MP minus identification with constituency MP
egen max_surrogation = rowmax(s_MP1 s_MP2 s_MP4 s_MP5 s_MP6)
gen surrogation_dimension = max_surrogation - s_MP3 
label variable surrogation_dimension "Respondent's demand for territorial surrogation"


//GET DATA ABOUT LOCAL MP

//Get sex of local MP
gen representative = MP
gen constituency = Constituency
merge m:1 representative constituency using "./Created auxiliary datasets/mp_level_data.dta"
drop if _merge == 2
drop _merge

//Create same sex variable
tab female gender_MP
gen same_sex = 0
replace same_sex = 1 if female == 0 & gender_MP == "Male"
replace same_sex = 1 if female == 1 & gender_MP == "Female"
label variable same_sex "Dummy variable for whether respondent and local MP have same sex"
label define same_sex 1 "Same sex" 0 "Different sex"
label values same_sex same_sex

//Get party of local MP
merge m:1 constituency using "./Other data sources/mp_data.dta"
drop if _merge == 2
drop _merge

//Create copartisanship variable
replace party = "Green Party" if party == "Green"
replace party = "Scottish National Party (SNP)" if party == "Scottish National Party"
replace party = "Labour" if party == "Labour/Co-operative"
gen copartisan = 0
replace copartisan = 1 if party == Party_Close
label variable copartisan "Dummy variable for whether respondent feels close to party of local MP"
label define copartisan 1 "Local MP's party is close party" 0 "Local MP's party is not close party"
label values copartisan copartisan
rename party party_local
label variable party_local "Party of local MP"

//Create variable of local MP standing down
rename standing_down standing_down_local
label variable standing_down_local "Dummy variable whether local MP is standing down in 2019"

//Get all other covariates for chosen MP
merge m:1 representative constituency using "./Created auxiliary datasets/mp_constituency_data.dta", update replace
drop if _merge == 2
drop _merge

//Rename variables
rename ws_left_right ws_left_right_local
rename women_words_perc women_words_perc_local
rename scores_rep_plu_scale scores_rep_plu_scale_local
rename last_party_whip last_party_whip_local
rename rebellion rebellion_local
rename gender_MP gender_MP_local
rename unemployment unemployment_local
rename brexit_vote brexit_vote_local
rename party_whip_enc party_whip_enc_local
rename responsiveness responsiveness_local
rename local_focus local_focus_local

//Relabel data
label variable unemployment_local "Unemployment rate in local constituency (%)"

//Save name of local MP and constituency
gen local_MP = representative
label variable local_MP "Name of local MP"
gen local_constituency = constituency
label variable local_constituency "Name of local constituency"

//Drop variables not necessary
drop last_vote party majority_size uri


//GET DATA ABOUT CHOSEN MP

//Record chosen representative and her constituency
replace representative = RP_chosen
replace representative = substr(representative, 1, strlen(representative)-1) //get rid of a superfluent spacing at the end of each name
gen representative_constituency = constituency if representative == local_MP
replace representative_constituency = "Uxbridge and South Ruislip" if representative == "Boris Johnson"
replace representative_constituency = "Islington North" if representative == "Jeremy Corbyn"
replace representative_constituency = "East Dunbartonshire" if representative == "Jo Swinson"
replace representative_constituency = "Ross, Skye and Lochaber" if representative == "Ian Blackford"
replace representative_constituency = Constituency_other if representative_constituency == ""
label variable representative_constituency "Name of chosen MP's constituency"

//Get all other covariates for chosen MP
replace constituency = representative_constituency
merge m:1 representative constituency using "./Created auxiliary datasets/mp_constituency_data.dta", update replace
drop if _merge == 2
drop _merge

//Create variable of chosen MP standing down
rename standing_down standing_down_representative
label variable standing_down_representative "Dummy variable whether chosen MP is standing down in 2019"

//Rename unemployment rate and Brexit vote in chosen MP's constituency
rename unemployment unemployment_representative
label variable unemployment_representative "Unemployment rate in chosen MP's constituency (%)"
rename brexit_vote brexit_vote_representative

//Create variable for sex of MP
gen female_chosen_MP = .
replace female_chosen_MP = 1 if gender_MP == "Female"
replace female_chosen_MP = 0 if gender_MP == "Male"
label variable female_chosen_MP "Binary indicator for whether chosen MP is female"

//Drop variables not needed
drop gender constituency last_party_whip last_vote Consent Constituency Gender Birth Education Party_Close Left_Right Justification_Scale Party_Independent Tradeoff_Votes Constituency_name MP MP_other Constituency_other RP_chosen s_MP1 s_MP2 s_MP3 s_MP4 s_MP5 s_MP6 max_surrogation education IPAddress Progress Finished RecordedDate RecipientLastName RecipientFirstName RecipientEmail ExternalReference LocationLatitude LocationLongitude DistributionChannel UserLanguage ProlificID Right_to_Vote BS mpnumber party



//////////////////////////////////////////////////
//Consolidate and save data for constituent side
//////////////////////////////////////////////////

//Rename and relabel variables
rename StartDate interview_start
label variable interview_start "Start date and time of interview"
rename EndDate interview_end
label variable interview_end "End date and time of interview"
rename Durationinseconds interview_duration
label variable interview_duration "Interview duration in seconds"
rename ResponseId unique_id 
label variable unique_id "Unique case identifier"
rename MPs_1 Johnson_as_rep 
label variable Johnson_as_rep "Respondent's rating of whether Johnson is seen as representative"
rename MPs_2 Corbyn_as_rep 
label variable Corbyn_as_rep "Respondent's rating of whether Corbyn is seen as representative"
rename MPs_3 local_MP_as_rep 
label variable local_MP_as_rep "Respondent's rating of whether local MP is seen as representative"
rename MPs_4 other_MP_as_rep 
label variable other_MP_as_rep "Respondent's rating of whether other MP is seen as representative"
rename MPs_5 Blackford_as_rep 
label variable Blackford_as_rep "Respondent's rating of whether Blackford is seen as representative"
rename MPs_6 Swinson_as_rep 
label variable Swinson_as_rep "Respondent's rating of whether Swinson is seen as representative"
label variable representative "Name of chosen MP / primary representative"

//The line below shows that about 97% of respondents provided ratings for Johnson and Corbyn - disabled in case you have not installed mdesc command
//mdesc Johnson_as_rep Corbyn_as_rep

//Encode and recode necessary variables
gen female_local_MP = .
replace female_local_MP = 0 if gender_MP_local == "Male"
replace female_local_MP = 1 if gender_MP_local == "Female"
label variable female_local_MP "Binary indicator whether local MP is female"

encode Second_EUReferendum, gen(referendum)
replace referendum = 999 if referendum == .
label variable referendum "Respondent's vote intention in a second Brexit referendum"

//Reorder variables
order unique_id interview_start interview_end interview_duration female age uni_degree left_right party_close surrogation_dimension justification_dimension personalization_dimension responsiveness_dimension Johnson_as_rep Corbyn_as_rep Swinson_as_rep Blackford_as_rep local_MP_as_rep other_MP_as_rep local_MP local_constituency representative representative_constituency same_sex copartisan rebellion majority_size won_again standing_down_local standing_down_representative unemployment_local unemployment_representative

//Save data
save "analysis_dataset_constituents.dta", replace



/////////////////////////////////////////////////////
//Consolidate and save data for representative side
/////////////////////////////////////////////////////

//Load data
use "./Created auxiliary datasets/mp_constituency_data.dta", clear

//Create variables needed
gen gender_MP_enc = .
replace gender_MP_enc = 1 if gender_MP == "Female"
replace gender_MP_enc = 0 if gender_MP == "Male"
label variable gender_MP_enc "Binary indicator for whether Female MPs"

encode last_party_whip, gen(party_enc)
label variable party_enc "Last party membership of MP"

egen women_std = std(women_words_perc)
egen local_std = std(local_focus)
gen women_local = women_std - local_std
label variable women_local "Measure of MP's relative attention to women as opposed to local constituency issues"

//Save data
drop party mpnumber
save "analysis_dataset_representatives.dta", replace

